Import Libraries: Also setting working directory.
library(tidyr)
library(dbplyr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.1 ✔ purrr 1.0.1
✔ forcats 1.0.0 ✔ readr 2.1.4
✔ ggplot2 3.4.2 ✔ stringr 1.5.0
✔ lubridate 1.9.2 ✔ tibble 3.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::ident() masks dbplyr::ident()
✖ dplyr::lag() masks stats::lag()
✖ dplyr::sql() masks dbplyr::sql()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(ggplot2)
library(ggfortify)
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
library(forecast)
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
Registered S3 methods overwritten by 'forecast':
method from
autoplot.Arima ggfortify
autoplot.acf ggfortify
autoplot.ar ggfortify
autoplot.bats ggfortify
autoplot.decomposed.ts ggfortify
autoplot.ets ggfortify
autoplot.forecast ggfortify
autoplot.stl ggfortify
autoplot.ts ggfortify
fitted.ar ggfortify
fortify.ts ggfortify
residuals.ar ggfortify
setwd('C:/Users/cy_su/PycharmProjects/DSCI_605_Data_Visualizations/Module 5/M5_Lab4/')
Read the data from the CSV file: Convert the “Date” column to a datetime format Filter the data for the desired two-year period.
data <- read.csv("Fouryears_all.csv")
data$Date <- ymd_hms(data$Date)
start_date <- as.Date("2019-09-01")
end_date <- as.Date("2021-09-01")
filtered_data <- data %>%
filter(Date >= start_date & Date <= end_date & Primary.Type == "BATTERY")
Data Wrangling: Aggregate the data by hourly intervals. Display the aggregated data.
data_hourly <- filtered_data %>%
mutate(Hour = floor_date(Date, "hour")) %>%
group_by(Hour) %>%
summarise(Count = n()) %>%
ungroup()
Time Series Visualization Create a time series object. Plot the time series. Show the plot. Save the image.
What I did to improve the visualization: 1. Changed Line Color and Type. 2. Added point density. 3. Legend and Title Positions 4. Added a color palette
plot <- ggplot(data = data_hourly, aes(x = Hour, y = Count)) +
geom_smooth(aes(color = "Count"), se = FALSE, size = 1.2, linetype = "solid") +
geom_point(size = 2, color = "blue", alpha = 0.5) +
scale_x_datetime(date_labels = "%b %d, %Y", date_breaks = "1 day") +
scale_color_brewer(palette = "Set1", name = "Metrics") +
labs(x = "Date (Year-Month-Day)",
y = "Count",
title = "2-Year Series of Battery Crimes In California",
subtitle = "Hourly Data Aggregated by Day",
caption = "Source: Chicago Police Department’s CLEAR (Citizen Law Enforcement Analysis and Reporting) System") +
theme_minimal() +
theme(axis.text = element_text(size = 10, color = "darkblue"),
axis.title = element_text(size = 12, face = "bold"),
plot.title = element_text(size = 18, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 12, face = "italic", hjust = 0.5),
plot.caption = element_text(size = 8, hjust = 1),
legend.position = "bottom",
panel.grid.major = element_line(color = "gray", size = 0.1),
panel.grid.minor = element_line(color = "gray", size = 0.05),
plot.background = element_rect(fill = "aliceblue")) +
guides(color = guide_legend(override.aes = list(size = 4)))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
ggsave("ts_plot.png")
Saving 7 x 5 in image
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
plotly_plot <- ggplotly(plot)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
plotly_plot
Decompose the Time Series: Convert decomposed object to a data frame. Plot the data using autoplot. Save the image.
data_ts <- ts(data_hourly$Count, frequency = 24) # Adjust frequency as needed
data_ts_decomposed <- decompose(data_ts)
autoplot(data_ts_decomposed)
ggsave("ts_decomposed_plot.png")
Saving 7 x 5 in image
Create a MSTS Object: Decompose the object. Plot the object. Save the image.
battery_msts <- msts(data_hourly$Count, seasonal.periods = c(24,24*7,24*30))
battery_msts_decompose <- mstl(battery_msts)
autoplot(battery_msts_decompose)
ggsave("ts_msts_plot.png")
Saving 7 x 5 in image